pacman::p_load(jsonlite, tidygraph, ggraph,
visNetwork, graphlayouts, ggforce, writexl,
skimr, tidytext, tidyverse, igraph,RColorBrewer)Take Home Exercise 3
mc3_data <- fromJSON("data/MC3.json")mc3_edges <- as_tibble(mc3_data$links) %>%
distinct() %>%
mutate(source = as.character(source),
target = as.character(target),
type = as.character(type)) %>%
group_by(source, target, type) %>%
summarise(weights = n()) %>%
filter(source!=target) %>%
ungroup()`summarise()` has grouped output by 'source', 'target'. You can override using
the `.groups` argument.
mc3_nodes <- as_tibble(mc3_data$nodes) %>%
mutate(country = as.character(country),
id = as.character(id),
product_services = as.character(product_services),
revenue_omu = as.numeric(as.character(revenue_omu)),
type = as.character(type)) %>%
select(id, country, type, revenue_omu, product_services)Warning: There was 1 warning in `mutate()`.
ℹ In argument: `revenue_omu = as.numeric(as.character(revenue_omu))`.
Caused by warning:
! NAs introduced by coercion
DT :: datatable(mc3_edges)Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
DT::datatable(mc3_nodes)Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
token_nodes <- mc3_nodes %>%
unnest_tokens(word,
product_services)stopwords_removed <- token_nodes %>%
anti_join(stop_words)Joining with `by = join_by(word)`
unique_words <- stopwords_removed %>%
count(word, sort = TRUE)write_xlsx(unique_words,"C:/LinnThawTar/ISSS608_VAA/Take_Home_Exercise/Take_Home_EX_3/data/words.xlsx")id1 <- mc3_edges %>%
select(source) %>%
rename(id = source)
id2 <- mc3_edges %>%
select(target) %>%
rename(id = target)
mc3_nodes1 <- rbind(id1, id2) %>%
distinct() %>%
left_join(mc3_nodes,
unmatched = "drop")Joining with `by = join_by(id)`
mc3_graph <- tbl_graph(nodes = mc3_nodes1,
edges = mc3_edges,
directed = TRUE)edges_df <- mc3_graph %>%
activate(edges) %>%
as.tibble()Warning: `as.tibble()` was deprecated in tibble 2.0.0.
ℹ Please use `as_tibble()` instead.
ℹ The signature and semantics have changed, see `?as_tibble`.
nodes_df <- mc3_graph %>%
activate(nodes) %>%
as.tibble() %>%
rename(label = id) %>%
mutate(id=row_number()) %>%
select(id, label)g <- graph_from_data_frame(d=edges_df, vertices=nodes_df, directed=FALSE) # an undirected graph
gIGRAPH d572a7d UN-- 37324 24036 --
+ attr: name (v/c), label (v/c), type (e/c), weights (e/n)
+ edges from d572a7d (vertex names):
[1] 1 --16060 1 --16061 2 --16062 3 --16063 4 --16064 4 --16065 5 --16066
[8] 5 --16067 5 --16068 5 --16069 7 --16070 8 --16071 9 --16072 10--16073
[15] 11--16074 11--16075 11--16076 12--16077 13--16078 13--16079 13--16080
[22] 13--16081 13--16082 14--16083 14--16084 14--16085 15--16086 16--16087
[29] 16--16088 16--16089 16--16090 16--16091 17--16092 17--16093 18--16094
[36] 18--16095 18--16096 18--16097 18--16098 19--16099 20--16100 20--16101
[43] 22--16102 23--16103 23--16104 23--16105 23--16106 23--16107 23--16108
[50] 23--16109 23--16110 23--16111 23--16112 23--16113 23--16114 23--16115
+ ... omitted several edges
degree_centrality <- degree(g)
nodes_df$degree_centrality <- degree_centrality[as.character(nodes_df$label)]
head(sort(degree_centrality, decreasing=TRUE))13903 3483 9501 9903 14267 2053
120 91 72 65 62 54
mc3_colors_centrality <- rev(colorRampPalette(brewer.pal(9, "Oranges"))(22))
nodes_df <- nodes_df %>% mutate(degree_rank=37325-floor(rank(degree_centrality)),
color.background=mc3_colors_centrality[degree_rank])
network_degree <- visNetwork(nodes_df, edges_df, height='350px', width="100%", main="Degree Centrality") %>%
visLayout(randomSeed=21) %>%
visOptions(highlightNearest=TRUE,
nodesIdSelection=TRUE,
selectedBy="degree_rank") %>%
visInteraction(hideEdgesOnDrag=TRUE,
dragNodes=TRUE,
dragView=TRUE,
zoomView=TRUE,
navigationButtons=TRUE)
network_degree